# import pandas as pd
import json
import time

# df = pd.read_json("hf://datasets/AdaptLLM/finance-tasks/Headline/test.json")
# df.dropna(subset=["input","goldIndex"])
cnt = 0

with open("test.json", 'r') as f:
    data = json.load(f)
result = []
current=time.time()
for item in data:
    id = item['id']
    for content in item['input'].split("\n\n"):

        try:
            if content[-2:] == "No" and content[-8:] != "- No Yes":
                question = content[:-4]
                ans = content[-2:]
                result.append({
                    'id': id,
                    'Question': question,
                    'Answer': ans,
                })
            elif content[-3:] == "Yes" and content[-7:] != "- No No":
                question = content[:-5]
                ans = content[-3:]
                cnt += 1
                result.append({
                    'id': id,
                    'Question': question,
                    'Answer': ans,
                })
        except:
            pass
print("Cost Time:{}".format(time.time()-current))
print("Yes Answer Number:{}".format(cnt))
with open("target.json",'w') as f:
    json.dump(result,f)
